#########################################################################################
# Title: Appendix_B.R
# Author: Yuki Atsusaka (atsusaka@rice.edu)
# Aim: Code to replicate results in Online Appendix B of Atsusaka (2021)
#########################################################################################

# This R file contains codes for multiple tables and figures that appear in Appendix B.
# Figures and tables are listed in the order of appearance in the Appendix section

###########################################################################
# Figure B.1: REGISTRATION RATE BY RACE
###########################################################################

# THE FOLLOWING STATISTICS WERE OBTAINED FROM:
# https://www.sos.la.gov/ElectionsAndVoting/Pages/RegistrationStatisticsStatewide.aspx

rm(list=ls())

rg2019 <- c(2019, 3001240, 1909596, 937114, 154530)
rg2018 <- c(2018, 2976945, 1900205,	927842,	148898)
rg2017 <- c(2017, 2976695, 1904333,	925012,	147350)
rg2016 <- c(2016, 2910086, 1864338,	910562,	135186)
rg2015 <- c(2015, 2879068, 1844975,	901492,	132601)
rg2014 <- c(2014, 2917833, 1877388,	905624,	134821)
rg2013 <- c(2013, 2920374, 1883622,	902823,	133929)
rg2012 <- c(2012, 2860953, 1864517,	868957,	127479)
rg2011 <- c(2011, 2819730, 1844188,	851960,	123582)
rg2010 <- c(2010, 2914443, 1894106,	891897,	128440)
rg2009 <- c(2009, 2900441, 1886699,	888610,	125132)
rg2008 <- c(2008, 2837985, 1873424,	848104,	116457)
rg2007 <- c(2007, 2810558, 1861600,	836307,	112651)
rg2006 <- c(2006, 2879517, 1904965, 857715, 116837)

rg <- rbind(rg2019, rg2018, rg2017, rg2016, rg2015, rg2014, rg2013, 
            rg2012, rg2011, rg2010, rg2009, rg2008, rg2007, rg2006)		
colnames(rg) <- c("Year", "Total", "White", "Black", "Other")
rg <- as_tibble(rg)

rg$White.p <- rg$White/rg$Total
rg$Black.p <- rg$Black/rg$Total
rg$Other.p <- rg$Other/rg$Total


pdf("Figure_B1.pdf", width = 6, height = 4.5)

plot(rg$White.p ~ rg$Year, type="n", ylim=c(0,1), ylab="Registration by Race", xlab="Year")
lines(rg$White.p ~ rg$Year, lty=2, lwd=2)
lines(rg$Black.p ~ rg$Year, lty=3, lwd=2)
lines(rg$Other.p ~ rg$Year, lty=1, lwd=2)
title("Statewide Registration by Race")
legend("topright", legend=c("White", "Black", "Other"), lty=c(2,3, 1)) 

dev.off()


###########################################################################
# Table B.2: COMPUTING ePCPs FOR EXTENDED REGRESSIONS
###########################################################################

rm(list=ls())
library(xtable)
library(tidyverse)

dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) 
is.even <- function(x) x %% 2 == 0

dt1 <- dt
dt2 <- dt %>% filter(C < 40)                # Majority-white
dt3 <- dt %>% filter(C < 65 & C >= 40)      # Racially balanced
dt4 <- dt %>% filter(C >= 65)               # Majority-minority
dt5 <- dt %>% filter(city_type=="Urban")    # Urban
dt6 <- dt %>% filter(city_type=="Suburban") # Suburban 
dt7 <- dt %>% filter(city_type=="Rural")    # Rural
dt8 <- dt %>% filter(incumb_ran==0)         # Open Races
dt9 <- dt %>% filter(incumb_ran==1)         # Non-Open Races
dt10 <- dt %>% filter(unopposed==1)         # Unopposed elections
dt11 <- dt %>% filter(unopposed==0)         # Unopposed elections
dt12 <- dt %>% filter(is.even(year)==T)     # On-Cycle
dt13 <- dt %>% filter(is.even(year)==F)     # Off-Cycle
dt14 <- dt %>% filter(city_council=="AtLarge")  # At-Large City Councils
dt15 <- dt %>% filter(city_council=="District") # District City Councils
dt16 <- dt %>% filter(city_council=="Mixed")    # District City Councils
dt17 <- dt %>% filter(year <  1994)         # Before Southern Republican
dt18 <- dt %>% filter(year >= 1994)         # After  Southern Republican

dt_list <- list(dt1,dt2,dt3,dt4,dt5,
                dt6,dt7,dt8,dt9,dt10,
                dt11,dt12,dt13,dt14,dt15,
                dt16,dt17
                )

namvec <- c("All Districts", 
            "0<C<40", 
            "40<C<65",
            "65<C<100",
            "Urban",
            "Suburban",
            "Rural",
            "Open-Seat",
            "Not Open-Seat",
            "Uncontested Elections",
            "Contested Elections",
            "On-Cycle",
            "Off-Cycle",            
            "At-Large City Councils",
            "District City Councils",
            "Before 1994",
            "After 1994"
            )

df_insample <- data.frame(Subset = character(),
                          N = character(),
                          Logical = character(),
                          LMP = character(),
                          Logit = character(),
                          LogicalWin = character(),
                          LMPWin = character(),
                          LogitWin = character()
                          )

for(i in 1:length(dt_list)){

dat <- dt_list[[i]]     # TAKE ONE SUBSET OF DATA

N = dim(dat)[1]
M  = dat$M  # Racial margin of victory
C  = dat$C      # % Black voters 


############################
# LOGICAL MODEL
############################
q.model = sqrt(M*C) - 50 # GEOMETRIC MEAN

true.run = dat$run       # OUTCOME I
true.win = dat$win       # OUTCOME II

p.model   = pnorm(q=q.model,  mean=0, sd=1)
ePCP.model.run = (sum(p.model[true.run==1]) + sum(1 - p.model[true.run==0]))/N
ePCP.model.win = (sum(p.model[true.win==1]) + sum(1 - p.model[true.win==0]))/N

############################
# LINEAR PROBABILITY MODELS
############################
lpm.run <- lm(run ~ M + C +
                    educ_baplus_black + educ_baplus_white + factor(year) +
                    new_electiontime + unopposed + white_over65 + density, 
              data=dat)
p.lpm.run <- predict(lpm.run)
lpm.win <- lm(win ~ M + C +
                    educ_baplus_black + educ_baplus_white + factor(year) +
                    new_electiontime + unopposed + white_over65 + density, 
              data=dat)
p.lpm.win <- predict(lpm.win)

ePCP.lpm.run <- (sum(p.lpm.run[true.run==1]) + sum(1 - p.lpm.run[true.run==0]))/N
ePCP.lpm.win <- (sum(p.lpm.win[true.win==1]) + sum(1 - p.lpm.win[true.win==0]))/N

############################
# LOGISTIC REGRESSIONS
############################
logit.run <- glm(run ~ M + C +
                       educ_baplus_black + educ_baplus_white + factor(year) +
                       new_electiontime + unopposed +white_over65 +density, 
                 data=dat, family=binomial)
p.logit.run <- predict(logit.run, type="response")
logit.win <- glm(win ~ M + C +
                       educ_baplus_black + educ_baplus_white + factor(year) +
                       new_electiontime + unopposed +white_over65 +density, 
                 data=dat, family=binomial)
p.logit.win <- predict(logit.win, type="response")

ePCP.logit.run <- (sum(p.logit.run[true.run==1]) + sum(1 - p.logit.run[true.run==0]))/N
ePCP.logit.win <- (sum(p.logit.win[true.win==1]) + sum(1 - p.logit.win[true.win==0]))/N


# OUTPUT
out <- c(paste0(namvec[i], " (", N, ")"), 
         round(ePCP.model.run*100,d=1),
         round(ePCP.lpm.run*100,  d=1),
         round(ePCP.logit.run*100,d=1),
         round(ePCP.model.win*100,d=1),
         round(ePCP.lpm.win*100,  d=1),
         round(ePCP.logit.win*100,d=1))

# STACK
df_insample <- rbind(df_insample, out)

}


#####################################
# WRITE OUT A TABLE IN LATEX CODE
#####################################
names(df_insample) <- c("Subset", "Logical", "LMP", "Logit", "Logical", "LMP", "Logit")
rownames(df_insample) <- NULL
print(df_insample)

print(xtable(df_insample, digits=c(0,1,1,1,1,1,1,1)),
      include.rownames=FALSE)


###########################################################################
# Figure B.2: PLOTTING IN-SAMPLE COEFFICIENTS
###########################################################################

rm(list=ls())
library(tidyverse)

dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) 

pdf(here::here("Figure_B2.pdf"), width=8, height=5)
par(mar=c(3,5,1,1), oma=c(0,1,0.5,0), mfrow=c(1,2))

######################################################
# Linear Probablity Model
######################################################
g2 <- lm(run ~ M + C +
         educ_baplus_black + educ_baplus_white + factor(year) +
         new_electiontime + unopposed + white_over65 + density, data=dt)


coefg <- coef(g2)[-1]
names(coefg) <- c("M", "black%", "blackBA%", "whiteBA%", "1989", 
                  "1990", "1991", "1992", "1993", "1994",
                  "1995", "1996", "1997", "1998", "1999",
                  "2000", "2001", "2002", "2003", "2004",
                  "2005", "2006", "2007", "2008", "2009",
                  "2010", "2011", "2012", "2013", "2014",
                  "2015", "2016", "election_cycle", "unopposed", "whiteOver65%",
                  "HumanDensity")

coefg <- coefg[order(abs(coefg), decreasing = TRUE)]
se <- summary(g2)$coefficients[-1,2]
se <- se[order(abs(coefg), decreasing = TRUE)]

plot(coefg[order(coefg)], 1:36, type="n", xlab="Coefficient", ylab = "", yaxt="n") # Remove intercept
abline(h=1:36, col="gray80", lty=2)
points(coefg[order(coefg)], 1:36, pch=16)
arrows(x0=coefg[order(coefg)]-1.96*se[order(se)],
       x1=coefg[order(coefg)]+1.96*se[order(se)],
       y0=1:36, y1=1:36, length=0, col="firebrick4")
abline(v=0, col="midnightblue", lwd=2)
axis(2, at=1:36, names(coefg[order(coefg)]), las=2, cex.axis=0.6)
title("Linear Probability Model", line=0.3)


######################################################
# Logistic regression
######################################################
g <- glm(run ~ M + C +
         educ_baplus_black + educ_baplus_white + factor(year) +
           new_electiontime + unopposed +white_over65 +density, data=dt, family=binomial)


coefg <- coef(g)[-1]
names(coefg) <- c("M", "black%", "blackBA%", "whiteBA%", "1989", 
                  "1990", "1991", "1992", "1993", "1994",
                  "1995", "1996", "1997", "1998", "1999",
                  "2000", "2001", "2002", "2003", "2004",
                  "2005", "2006", "2007", "2008", "2009",
                  "2010", "2011", "2012", "2013", "2014",
                  "2015", "2016", "election_cycle", "unopposed", "whiteOver65%",
                  "HumanDensity")
coefg <- coefg[order(abs(coefg), decreasing = TRUE)]
se <- summary(g)$coefficients[-1,2]
se <- se[order(abs(coefg), decreasing = TRUE)]


plot(coefg[order(coefg)], 1:36, type="n", xlab="Coefficient", ylab = "", yaxt="n") # Remove intercept
abline(h=1:36, col="gray80", lty=2)
points(coefg[order(coefg)], 1:36, pch=16)
arrows(x0=coefg[order(coefg)]-1.96*se[order(se)],
       x1=coefg[order(coefg)]+1.96*se[order(se)],
       y0=1:36, y1=1:36, length=0, col="firebrick4")
abline(v=0, col="midnightblue", lwd=2)
axis(2, at=1:36, names(coefg[order(coefg)]), las=2, cex.axis=0.6)
title("Logistic Regression", line=0.3)

dev.off()
par(mar=c(5.1, 4.1, 4.1, 2.1), oma=c(0,0,0,0))


###########################################################################
# Figure B.3: (NOT) PREDICTING THE NUMBER OF MINORITY CANDIDATES
###########################################################################

rm(list=ls())
library(MASS)
library(scales)
library(poweRlaw)
library(tidyverse)

dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) %>%
      filter(num_black_cand!=0)

pblack <- dt$C
margin <- dt$M
num.ob <- dt$num_black_cand

# COMPUTING MODEL PREDICTION
M_vec <- seq(from=0, to=100, by=1)     # Racial Margin of Victory
C_vec <- seq(from=0, to=100, by=1)     # % Minority Voters
Mt <- matrix(NA, ncol=101, nrow=101)   # MATRIX FOR PREDICTED PROBABILITY
                                       # row=C, col=M
for(j in seq_along(M_vec)){  # LOOP OVER M
 M <- M_vec[j]

for(i in seq_along(C_vec)){  # LOOP OVER C
 C <- C_vec[i]   
 q <- sqrt(M*C) - 50

Mt[i,j] <- pnorm(q=q, mean=0, sd=1)    # Prob(minority candidate emergence)

}
}


pdf(here::here("Figure_B3.pdf"),width=10,height=5.5)
par(mfrow=(c(1,2)))

# (1) VISUALIZE THE DISTRIBUTION OVER THE LOGICAL MODEL RANGE
plot(0,0, type="n", xaxt="n", xlim=c(-2,102), ylim=c(0,100), 
     ylab="C (% Black VAP)", xlab="M (Racial Margin of Victory)")
for(i in 1:7){text(margin[num.ob==i], pblack[num.ob==i], label=i, cex=(1+i*0.2), col=alpha("firebrick4", 0.8))}
for(i in 10:11){text(margin[num.ob==i], pblack[num.ob==i], label=i, cex=(1+i*0.2), col=alpha("firebrick4", 0.8))}
contour(x=M_vec,y=C_vec,z=Mt, col="black", lty = "solid", add = TRUE)
axis(1, at=seq(0, 100, by=25))

# (2) POWER LAW PLOT
prop <- table(num.ob)/length(num.ob)
prop <- c(prop[1:7],0,0,prop[8:9])
xax <- 1:11
xtc <- log(xax)
ytc <- log(c(0.1,0.01,0.001))
ln.num <- sort(log(xax))
ln.prop <- sort(log(prop),decreasing = T) 

m_pl = displ$new(num.ob)
est_pl <- estimate_xmin(m_pl)
m_pl$setXmin(est_pl)
m_pl$pars                 # ESTIMATED ALPHA

plot(m_pl, pch=16,col=alpha("firebrick4", 0.8),
     xlab="Rank(# Black Candidates)",ylab="CDF")
lines(m_pl, lwd=2)
text(1.2,0.007,labels=expression(paste(alpha, "=4.27")))
text(1.3,0.005,labels="p-val=0.79")
text(2.2,0.003,labels=expression(paste("(", H[0], ": Data follows the power law)")))

dev.off()



###########################################################################
# Table B.3: RESULTS OF COUNT REGRESSIONS
###########################################################################

pblack.x <- pblack[num.ob<10] # EXCLUDING OUTLIERS
margin.x <- margin[num.ob<10]
num.x <- num.ob[num.ob<10]

summary(glm(num.ob ~ margin + pblack, family="poisson"))    # REGRESSION 1
summary(glm.nb(num.ob ~ margin + pblack))                   # REGRESSION 2
summary(glm(num.x ~ margin.x + pblack.x, family="poisson")) # REGRESSION 3
summary(glm.nb(num.x ~ margin.x + pblack.x))                # REGRESSION 4


###########################################################################
# Table B.4: PLACEBO TESTS WITH WOMEN CANDIDATE EMERGENCE AND VICTORY
###########################################################################

rm(list=ls())
library(xtable)
library(tidyverse)

dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) %>%
      mutate(run = woman_run,
             win = woman_win)
is.even <- function(x) x %% 2 == 0

dt1 <- dt
dt2 <- dt %>% filter(C < 40)                # Majority-white
dt3 <- dt %>% filter(C < 65 & C >= 40)      # Racially balanced
dt4 <- dt %>% filter(C >= 65)               # Majority-minority
dt5 <- dt %>% filter(city_type=="Urban")    # Urban
dt6 <- dt %>% filter(city_type=="Suburban") # Suburban 
dt7 <- dt %>% filter(city_type=="Rural")    # Rural
dt8 <- dt %>% filter(incumb_ran==0)         # Open Races
dt9 <- dt %>% filter(incumb_ran==1)         # Non-Open Races
dt10 <- dt %>% filter(unopposed==1)         # Unopposed elections
dt11 <- dt %>% filter(unopposed==0)         # Unopposed elections
dt12 <- dt %>% filter(is.even(year)==T)     # On-Cycle
dt13 <- dt %>% filter(is.even(year)==F)     # Off-Cycle
dt14 <- dt %>% filter(city_council=="AtLarge")  # At-Large City Councils
dt15 <- dt %>% filter(city_council=="District") # District City Councils
dt16 <- dt %>% filter(city_council=="Mixed")    # District City Councils
dt17 <- dt %>% filter(year <  1994)         # Before Southern Republican
dt18 <- dt %>% filter(year >= 1994)         # After  Southern Republican

dt_list <- list(dt1,dt2,dt3,dt4,dt5,
                dt6,dt7,dt8,dt9,dt10,
                dt11,dt12,dt13,dt14,dt15,
                dt16,dt17,dt18
                )

namvec <- c("All Districts", 
            "0<C<40", 
            "40<C<65",
            "65<C<100",
            "Urban",
            "Suburban",
            "Rural",
            "Open-Seat",
            "Not Open-Seat",
            "Uncontested Elections",
            "Contested Elections",
            "On-Cycle",
            "Off-Cycle",            
            "At-Large (Councils)",
            "District (Councils)",
            "Mixed (Councils)",            
            "Before 1994",
            "After 1994"
            )

df_insample <- data.frame(Subset = character(),
                          N = character(),
                          Logical = character(),
                          LMP = character(),
                          Logit = character(),
                          LogicalWin = character(),
                          LMPWin = character(),
                          LogitWin = character()
                          )

for(i in 1:length(dt_list)){

dat <- dt_list[[i]]     # TAKE ONE SUBSET OF DATA

N = dim(dat)[1]
M  = dat$M     # Racial margin of victory
C  = dat$C     # % Black voters 


############################
# LOGICAL MODEL
############################
q.model = sqrt(M*C) - 50 # GEOMETRIC MEAN

true.run = dat$run       # OUTCOME I
true.win = dat$win       # OUTCOME II

p.model   = pnorm(q=q.model,  mean=0, sd=1)
ePCP.model.run = (sum(p.model[true.run==1]) + sum(1 - p.model[true.run==0]))/N
ePCP.model.win = (sum(p.model[true.win==1]) + sum(1 - p.model[true.win==0]))/N

############################
# LINEAR PROBABILITY MODELS
############################
lpm.run <- lm(run ~ M + C, data=dat)
p.lpm.run <- predict(lpm.run)
lpm.win <- lm(win ~ M + C, data=dat)
p.lpm.win <- predict(lpm.win)

ePCP.lpm.run <- (sum(p.lpm.run[true.run==1]) + sum(1 - p.lpm.run[true.run==0]))/N
ePCP.lpm.win <- (sum(p.lpm.win[true.win==1]) + sum(1 - p.lpm.win[true.win==0]))/N


############################
# LOGISTIC REGRESSIONS
############################
logit.run <- glm(run ~ M + C, data=dat, family=binomial)
p.logit.run <- predict(logit.run, type="response")
logit.win <- glm(win ~ M + C, data=dat, family=binomial)
p.logit.win <- predict(logit.win, type="response")

ePCP.logit.run <- (sum(p.logit.run[true.run==1]) + sum(1 - p.logit.run[true.run==0]))/N
ePCP.logit.win <- (sum(p.logit.win[true.win==1]) + sum(1 - p.logit.win[true.win==0]))/N


# OUTPUT
out <- c(paste0(namvec[i], " (", N, ")"), 
         round(ePCP.model.run*100,d=1),
         round(ePCP.lpm.run*100,  d=1),
         round(ePCP.logit.run*100,d=1),
         round(ePCP.model.win*100,d=1),
         round(ePCP.lpm.win*100,  d=1),
         round(ePCP.logit.win*100,d=1))

# STACK
df_insample <- rbind(df_insample, out)

}

#####################################
# WRITE OUT A TABLE IN LATEX CODE
#####################################
names(df_insample) <- c("Subset", "Logical", "LMP", "Logit", "Logical", "LMP", "Logit")
rownames(df_insample) <- NULL
print(df_insample)

print(xtable(df_insample, digits=c(0,1,1,1,1,1,1,1)),
      include.rownames=FALSE)


###########################################################################
# Table B.5: PLACEBO TESTS WITH WOMEN CANDIDATE EMERGENCE AND VICTORY
###########################################################################

rm(list=ls())
library(xtable)
library(tidyverse)


dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) %>%
      filter(!is.na(M) & !is.na(M_t2) & !is.na(M_t3))
dt %>% filter(run==0 & win==1) %>% dim()
is.even <- function(x) x %% 2 == 0

dt1 <- dt
dt2 <- dt %>% filter(C < 40)                # Majority-white
dt3 <- dt %>% filter(C < 65 & C >= 40)      # Racially balanced
dt4 <- dt %>% filter(C >= 65)               # Majority-minority
dt5 <- dt %>% filter(city_type=="Urban")    # Urban
dt6 <- dt %>% filter(city_type=="Suburban") # Suburban 
dt7 <- dt %>% filter(city_type=="Rural")    # Rural
dt8 <- dt %>% filter(incumb_ran==0)         # Open Races
dt9 <- dt %>% filter(incumb_ran==1)         # Non-Open Races
dt10 <- dt %>% filter(unopposed==1)         # Unopposed elections
dt11 <- dt %>% filter(unopposed==0)         # Unopposed elections
dt12 <- dt %>% filter(is.even(year)==T)     # On-Cycle
dt13 <- dt %>% filter(is.even(year)==F)     # Off-Cycle
dt14 <- dt %>% filter(city_council=="AtLarge")  # At-Large City Councils
dt15 <- dt %>% filter(city_council=="District") # District City Councils
dt16 <- dt %>% filter(city_council=="Mixed")    # District City Councils
dt17 <- dt %>% filter(year <  1994)         # Before Southern Republican
dt18 <- dt %>% filter(year >= 1994)         # After  Southern Republican

dt_list <- list(dt1,dt2,dt3,dt4,dt5,
                dt6,dt7,dt8,dt9,dt10,
                dt11,dt12,dt13,dt14,dt15,
                dt16,dt17,dt18
                )

namvec <- c("All Districts", 
            "0<C<40", 
            "40<C<65",
            "65<C<100",
            "Urban",
            "Suburban",
            "Rural",
            "Open-Seat",
            "Not Open-Seat",
            "Uncontested",
            "Contested",
            "On-Cycle",
            "Off-Cycle",            
            "At-Large (Councils)",
            "District (Councils)",
            "Mixed (Councils)",            
            "Before 1994",
            "After 1994"
            )

df_insample <- data.frame(Subset = character(),
                          N = character(),
                          Logical = character(),
                          M2 = character(),
                          M3 = character(),
                          LogicalWin = character(),
                          M2Win = character(),
                          M3Win = character()
                          )

for(i in 1:length(dt_list)){

dat <- dt_list[[i]]     # TAKE ONE SUBSET OF DATA

N = dim(dat)[1]
M  = dat$M  # Racial margin of victory
C  = dat$C  # % Black voters 

##############################
# LOGICAL MODEL
##############################
q.model = sqrt(M*C) - 50 # GEOMETRIC MEAN

true.run = dat$run       # OUTCOME I
true.win = dat$win       # OUTCOME II

p.model   = pnorm(q=q.model,  mean=0, sd=1)
ePCP.model.run = (sum(p.model[true.run==1]) + sum(1 - p.model[true.run==0]))/N
ePCP.model.win = (sum(p.model[true.win==1]) + sum(1 - p.model[true.win==0]))/N

#############################
# MULTIPLE ELECTION MODELS
#############################
M_two = sapply(dat$M+dat$M_t2, FUN=sum)/2 + 50  # M based on past TWO elections
M_three = sapply(dat$M+dat$M_t2+dat$M_t3, FUN=sum)/3 + 50# M based on past THREE elections

q.model2 = sqrt(M_two*C) - 50   # GEOMETRIC MEAN
q.model3 = sqrt(M_three*C) - 50 # GEOMETRIC MEAN

p.model2   = pnorm(q=q.model2,  mean=0, sd=1)
ePCP.model2.run = (sum(p.model2[true.run==1]) + sum(1 - p.model2[true.run==0]))/N
ePCP.model2.win = (sum(p.model2[true.win==1]) + sum(1 - p.model2[true.win==0]))/N

p.model3   = pnorm(q=q.model3,  mean=0, sd=1)
ePCP.model3.run = (sum(p.model3[true.run==1]) + sum(1 - p.model3[true.run==0]))/N
ePCP.model3.win = (sum(p.model3[true.win==1]) + sum(1 - p.model3[true.win==0]))/N


# OUTPUT
out <- c(paste0(namvec[i], " (", N, ")"), 
         round(ePCP.model.run*100,d=1),
         round(ePCP.model2.run*100,  d=1),
         round(ePCP.model3.run*100,d=1),
         round(ePCP.model.win*100,d=1),
         round(ePCP.model2.win*100,  d=1),
         round(ePCP.model3.win*100,d=1))

# STACK
df_insample <- rbind(df_insample, out)

}


#####################################
# WRITE OUT A TABLE IN LATEX CODE
#####################################
names(df_insample) <- c("Subset", "Logical", "M2", "M3", "Logical", "M2", "M3")
rownames(df_insample) <- NULL
print(df_insample)

print(xtable(df_insample, digits=c(0,1,1,1,1,1,1,1)),
      include.rownames=FALSE)


###########################################################################
# Table B.6: MINORITY RETREAT AND HOPELESS ENTRY
###########################################################################

rm(list=ls())
library(tidyverse)

# USING THE LOUISIANA MAYORAL ELECTION DATA
dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) %>%
      mutate(q.model = sqrt(M*C) - 50,
             p.model = pnorm(q=q.model, mean=0, sd=1))

#########################################
# TESTING THE MINOTIY RETREAT HYPOTHESIS:
#########################################

dt %>% filter(p.model>=0.5) %>% dim()          # 312 ELECTIONS
dt %>% filter(p.model>=0.5 & run==0) %>% dim() #  10 ELECTIONS
# --> 10/312 = 3% MINORITY RETREAT

dt %>% filter(C<50 & p.model>=0.5) %>% dim()          # 18 ELECTIONS
dt %>% filter(C<50 & p.model>=0.5 & run==0) %>% dim() # 2 ELECTIONS
# --> 2/18 = 11.1% MINORITY RETREAT in LESS THAN 50% MINORITY DISTRICTS


dt %>% filter(C>=50 & p.model>=0.5) %>% dim()          # 294 ELECTIONS
dt %>% filter(C>=50 & p.model>=0.5 & run==0) %>% dim() # 8 ELECTIONS
# --> 8/294 = 2.7% MINORITY RETREAT in OVER 50% MINORITY DISTRICTS


################################################
# TESTING THE MINOTIY HOPELESS ENTRY HYPOTHESIS:
################################################

dt %>% filter(p.model<=0.1) %>% dim()          # 1712 ELECTIONS
dt %>% filter(p.model<=0.1 & run==1) %>% dim() # 212 ELECTIONS
# --> 212/1712 = 12.4% MINORITY ENTER HOPELESSLY in MORE THAN 50% MINORITY DISTRICTS

dt %>% filter(C<50 & p.model<=0.1) %>% dim()          # 1524 ELECTIONS
dt %>% filter(C<50 & p.model<=0.1 & run==1) %>% dim() # 129 ELECTIONS
# --> 129/1542 = 8.46% MINORITY ENTER HOPELESSLY in MORE THAN 50% MINORITY DISTRICTS

dt %>% filter(C>=50 & p.model<=0.1) %>% dim()          # 188 ELECTIONS
dt %>% filter(C>=50 & p.model<=0.1 & run==1) %>% dim() # 83 ELECTIONS
# --> 83/188 = 44% MINORITY ENTER HOPELESSLY in MORE THAN 50% MINORITY DISTRICTS


# USING THE STATE LEGISLATIVE ELECTION DATA
rm(list=ls())
dt <- read_csv(here::here("Data_StateLegislative.csv")) %>%
      filter(state!="AZ") %>%
      mutate(q.model = sqrt(M*C) - 50,
             p.model = pnorm(q=q.model, mean=0, sd=1))

#########################################
# TESTING THE MINOTIY RETREAT HYPOTHESIS:
#########################################

dt %>% filter(p.model>=0.5) %>% dim()          # 320 ELECTIONS
dt %>% filter(p.model>=0.5 & minority_run==0) %>% dim() # 6 ELECTIONS
# --> 6/320 = 1.875% MINORITY RETREAT 

dt %>% filter(C<50 & p.model>=0.5) %>% dim()          # 31 ELECTIONS
dt %>% filter(C<50 & p.model>=0.5 & minority_run==0) %>% dim() # 0 ELECTIONS
# --> 0/11 = 0% MINORITY RETREAT in LESS THAN 40% MINORITY DISTRICTS

dt %>% filter(C>=50 & p.model>=0.5) %>% dim()          # 289  ELECTIONS
dt %>% filter(C>=50 & p.model>=0.5 & minority_run==0) %>% dim() # 6 ELECTIONS
# --> 6/289 = 2.1% MINORITY RETREAT in LESS THAN 40% MINORITY DISTRICTS

################################################
# TESTING THE MINOTIY HOPELESS ENTRY HYPOTHESIS:
################################################
dt %>% filter(p.model<=0.1) %>% dim()                   # 972  ELECTIONS
dt %>% filter(p.model<=0.1 & minority_run==1) %>% dim() # 103    ELECTIONS
# --> 103/972 = 10.5% MINORITY ENTER HOPELESSLY 

dt %>% filter(C<50 & p.model<=0.1) %>% dim()          # 902  ELECTIONS
dt %>% filter(C<50 & p.model<=0.1 & minority_run==1) %>% dim() # 70ELECTIONS
# --> 70/902 = 7.8% MINORITY ENTER HOPELESSLY in MORE THAN 50% MINORITY DISTRICT

dt %>% filter(C>=50 & p.model<=0.1) %>% dim()          # 70 ELECTIONS
dt %>% filter(C>=50 & p.model<=0.1 & minority_run==1) %>% dim() # 33 ELECTIONS
# --> 44/76 = 47.1% MINORITY ENTER HOPELESSLY in MORE THAN 50% MINORITY DISTRICTS


###########################################################################
# Figure B.4: PREDICTING THE NUMBER OF MINORITY WINNERS (LOUISIANA)
###########################################################################

rm(list=ls())
library(DescTools) # for Mode function
library(tidyverse)

dt <- read_csv(here::here("Data_LAMayoral_Appendix.csv")) 
year <- unique(dt$year) %>% sort()


pdf(here::here("Figure_B4.pdf"), width=12, height=12)
par(mfrow=c(5,6),mar=c(4.5, 2, 4.1, 2))
for(y in year){

dt.y <- dt %>% filter(year == y) # FOR EACH YEAR  

  
M  = dt.y$M  # Adjusted Racial margin of victory
C  = dt.y$C     # % Black voters

# LOGICAL MODEL
q.model = sqrt(M*C) - 50 # GEOMETRIC MEAN
p.model = pnorm(q=q.model,  mean=0, sd=1) # MODEL PREDICTION

true.win = dt.y$win        # TRUE NUMBER OF DISTRICTS WITH MINORITY REPS
(N_true <- sum(true.win))# TRUE STATEWIDE NUMBER


N_pred = NA              # STORAGE FOR MONTE CARLO SIMULATIONS
# MONTE CARLO SIMULATION
for(i in 1:1000){
N.model = sapply(p.model, function(x) rbinom(n=1, size=1, prob=x))
N_pred[i] <- sum(N.model)  
}

gap = Mode(N_pred) - N_true

# PLOT THE SIMULATION RESULTS
hist(N_pred, 
     main=paste0(y), 
     xlim=c(min(N_pred, N_true), max(N_pred, N_true)),
     xlab=paste0("Modal Gap = ", gap),
     ylab="",
     cex.lab=1.8,
     cex.main=2,
#     breaks=20,
     border=F,
     col="gray50")
abline(v=N_true, lty=2, col="firebrick4", lwd=3)


} # END OF THE OUTTER LOOP

dev.off()


###########################################################################
# Figure B.5: PREDICTING THE NUMBER OF MINORITY WINNERS (STATE LEGISLATURES)
###########################################################################

rm(list=ls())
library(DescTools) # for Mode function
library(tidyverse)

dt <- read_csv(here::here("Data_StateLegislative.csv")) %>%
      mutate(win = ifelse(is.na(minority_win),0,minority_win)) %>%
      filter(unusual==0)
state <- unique(dt$state) %>% sort()


pdf(here::here("Figure_B5.pdf"), width=10, height=10)
par(mfrow=c(6,6),mar=c(4.5, 2, 4.1, 2))
for(s in state){

dt.y <- dt %>% filter(state == s) # FOR EACH YEAR  

  
M  = dt.y$M  # Adjusted Racial margin of victory
C  = dt.y$C   # % Black voters

# LOGICAL MODEL
q.model = sqrt(M*C) - 50 # GEOMETRIC MEAN
p.model = pnorm(q=q.model,  mean=0, sd=1) # MODEL PREDICTION

true.win = dt.y$win        # TRUE NUMBER OF DISTRICTS WITH MINORITY REPS
(N_true <- sum(true.win))# TRUE STATEWIDE NUMBER


N_pred = NA              # STORAGE FOR MONTE CARLO SIMULATIONS
# MONTE CARLO SIMULATION
for(i in 1:1000){
N.model = sapply(p.model, function(x) rbinom(n=1, size=1, prob=x))
N_pred[i] <- sum(N.model)  
}

gap = Mode(N_pred) - N_true

# PLOT THE SIMULATION RESULTS
hist(N_pred, 
     main=paste0(s), 
     xlim=c(min(N_pred, N_true), max(N_pred, N_true)),
     xlab=paste0("Modal Gap = ", gap),
     ylab="",
     cex.lab=1.8,
     cex.main=2,
#     breaks=20,
     border=F,
     col="gray50")
abline(v=N_true, lty=2, col="firebrick4", lwd=3)


} # END OF THE OUTTER LOOP

dev.off()





#########################################################################################
# END OF THIS R SOURCE FILE
#########################################################################################